In [1]:
import os
data_directory = r'C:\Users\Kevin\Documents\GitHub\msl-iposition-pipeline\examples'
touch_tbt_false_path = os.path.join(data_directory, '2018-04-24_11-35-39_touch_tbt_false.csv')
touch_tbt_true_path = os.path.join(data_directory, '2018-04-24_11-35-03_touch_tbt_true.csv')
desktop_tbt_false_path = os.path.join(data_directory, '2018-04-24_11-32-11_desktop_tbt_false.csv')
desktop_tbt_true_path = os.path.join(data_directory, '2018-04-24_11-33-40_desktop_tbt_true.csv')
Next, we open the data files.
In [2]:
import pandas as pd
touch_tbt_false_with_practice = pd.read_csv(touch_tbt_false_path, skiprows=[0])
touch_tbt_true_with_practice = pd.read_csv(touch_tbt_true_path, skiprows=[0])
desktop_tbt_false_with_practice = pd.read_csv(desktop_tbt_false_path, skiprows=[0])
desktop_tbt_true_with_practice = pd.read_csv(desktop_tbt_true_path, skiprows=[0])
Next, we remove the practice trials and reset the indices of the data.
In [3]:
touch_tbt_false = touch_tbt_false_with_practice[['practice' not in x for x in touch_tbt_false_with_practice['subID']]].reset_index()
touch_tbt_true = touch_tbt_true_with_practice[['practice' not in x for x in touch_tbt_true_with_practice['subID']]].reset_index()
desktop_tbt_false = desktop_tbt_false_with_practice[['practice' not in x for x in desktop_tbt_false_with_practice['subID']]].reset_index()
desktop_tbt_true = desktop_tbt_true_with_practice[['practice' not in x for x in desktop_tbt_true_with_practice['subID']]].reset_index()
data = [touch_tbt_false, touch_tbt_true, desktop_tbt_false, desktop_tbt_true]
labels = ['Touch, Collapsed Accuracy', 'Touch, Trial-by-Trial Accuracy', 'Desktop, Collapsed Accuracy', 'Desktop, Trial-by-Trial Accuracy']
We can list our columns for convenience to get the names right.
In [4]:
data[0].columns
Out[4]:
The first thing we want to check is whether or not there is a significant difference between the overall misplacement between the conditions. This difference is not dependent upon the accuracy evaluation method, so we're really just comparing two of the 4 files, but we'll plot all of them to make that clear.
In [5]:
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
%matplotlib inline
metric_column_name = 'Original Misplacement'
means = [x[metric_column_name].mean() for x in data]
errors = [x[metric_column_name].std()/np.sqrt(len(x)/25.0) for x in data]
ind = np.arange(len(data))
plt.title(metric_column_name)
plt.bar(ind, means, yerr=errors)
plt.ylabel('Mean ' + metric_column_name)
plt.xticks(ind, labels, rotation=20)
print(stats.ttest_ind(data[0][metric_column_name], data[2][metric_column_name], equal_var=False))
plt.show()
Finally, we can check for group/analysis type differences in our accuracy measures. There are better ways to do this, but the expected result is that there are going to be differences in these values based on analysis type (because we're drawing the accuracy circle two different ways), but not based on group. The ANOVA, therefore, should be significant, but only the t-tests for analysis type should be significant, not group.
In [6]:
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
%matplotlib inline
metric_column_names = ['Accurate Single-Item Placements', 'Accurate Misassignment',
'Rotation Theta', 'Scaling', 'Translation Magnitude',
'TranslationX', 'TranslationY', 'True Swaps', 'Cycle Swaps']
for metric_column_name in metric_column_names:
means = [x[metric_column_name].mean() for x in data]
errors = [x[metric_column_name].std()/np.sqrt(len(x)/25.0) for x in data]
ind = np.arange(len(data))
plt.title(metric_column_name)
plt.bar(ind, means, yerr=errors)
plt.ylabel('Mean ' + metric_column_name)
plt.xticks(ind, labels, rotation=20)
print('Analysis for ' + metric_column_name)
print('One-Way ANOVA for Analysis/Group Differences')
print(stats.f_oneway(*[list(x[metric_column_name].values) for x in data]))
print('T-Tests (don\'t bother looking if the prevous test isn\'t significant.')
print('______________')
print('T-Test for Analysis Type Difference in Touch Group')
print(stats.ttest_ind(data[0][metric_column_name], data[1][metric_column_name], equal_var=False))
print('T-Test for Analysis Type Difference in Desktop Group')
print(stats.ttest_ind(data[2][metric_column_name], data[3][metric_column_name], equal_var=False))
print('T-Test for Group Difference in Collapsed Analysis')
print(stats.ttest_ind(data[0][metric_column_name], data[2][metric_column_name], equal_var=False))
print('T-Test for Group Difference in Collapsed Analysis')
print(stats.ttest_ind(data[1][metric_column_name], data[3][metric_column_name], equal_var=False))
plt.show()
In [ ]: